Analyses of swisscom data

API data

Data

Grid

Testing on several PLZs of Bern & surroundings.

bern_plz <- read_rds("data/grid/bern_plz.Rds") %>% 
  rename(tile_id = tileId)

bern_plz_ct <- st_centroid(bern_plz)
x <character> 
# total N=6965 valid N=6965 mean=3044.06 sd=37.39

Value |    N | Raw % | Valid % | Cum. %
---------------------------------------
 3005 |  198 |  2.84 |    2.84 |   2.84
 3006 |  604 |  8.67 |    8.67 |  11.51
 3007 |  254 |  3.65 |    3.65 |  15.16
 3008 |  445 |  6.39 |    6.39 |  21.55
 3010 |   28 |  0.40 |    0.40 |  21.95
 3011 |  138 |  1.98 |    1.98 |  23.93
 3012 |  581 |  8.34 |    8.34 |  32.28
 3013 |  176 |  2.53 |    2.53 |  34.80
 3014 |  366 |  5.25 |    5.25 |  40.06
 3018 |  590 |  8.47 |    8.47 |  48.53
 3027 |  720 | 10.34 |   10.34 |  58.87
 3073 |  509 |  7.31 |    7.31 |  66.17
 3074 |  389 |  5.59 |    5.59 |  71.76
 3084 |  526 |  7.55 |    7.55 |  79.31
 3095 |  152 |  2.18 |    2.18 |  81.49
 3097 |  182 |  2.61 |    2.61 |  84.11
 3098 | 1107 | 15.89 |   15.89 | 100.00
 <NA> |    0 |  0.00 |    <NA> |   <NA>

Dwell density

Data is from http://mip.swisscom.ch which swisscom describes as:

Our new API platform offering 3 endpoints focusing on density, dwell times and origin destination

Important note: free data is limited to 2020-01-27 only!

We are using Heatmaps API to retrieve daily and hourly dwell times for one postcode. Code to retrieve data, kindly provided by Yann Steimer from swisscom, is in example_notebook_SC_heatmaps_API_UNIBE.ipynb.

Daily dwell density

read_fun <- function(filename) {
  
  data <- readr::read_delim(filename, 
                       delim = ";", escape_double = FALSE, trim_ws = TRUE,
                       show_col_types = FALSE) %>% 
    dplyr::select(tile_id, time, score) %>% 
    dplyr::as_tibble()
  
  data$plz <- gsub("_day|.csv", "", filename)
  data$plz <- gsub("data/swisscom/", "", data$plz)
  
  return( data )
}

doFuture::registerDoFuture()
future::plan("multisession", workers = 8)

data_day <- plyr::ldply(.data = fs::dir_ls("data/swisscom/", 
                                       regexp = "[0-9]_day[.]csv$"),
                    .fun = read_fun,
                    .id = NULL,
                    .parallel = TRUE)

Hourly dwell density

read_fun <- function(filename) {
  
  data <- readr::read_delim(filename, 
                       delim = ";", escape_double = FALSE, trim_ws = TRUE,
                       show_col_types = FALSE) %>% 
    dplyr::select(tile_id, time, score) %>% 
    dplyr::as_tibble()
  
  data$plz <- gsub("_hour|.csv", "", filename)
  data$plz <- gsub("data/swisscom/", "", data$plz)
  
  return( data )
}

data_hour <- plyr::ldply(.data = fs::dir_ls("data/swisscom/", 
                                       regexp = "[0-9]_hour[.]csv$"),
                    .fun = read_fun,
                    .id = NULL,
                    .parallel = TRUE)

EDA

Daily

bern_plz_day <- bern_plz %>% 
  left_join(data_day %>% select(-time))

Hourly

bern_plz_hour <- bern_plz %>% 
  left_join(data_hour)